Learning network analysis. Networks are everywhere, it can refer to road maps, urban planning of bus nodes, inter and intra zonal transports, shipping routes, air flight routes, sewerage and water lines, social network
The network is built using the statistics, to determine the number of nodes, complexity, visual clustering, closeness of the nodes. Between-ness is referring to the importance of the node / concentration risk.
Close-ness means which node is the central that requires the least number of paths to travel from 1 end to the other.
The centrality index is based on the number of flights from one airport to another airport.
Bipartite graph - A graph of analysis of executives to reveal the connections between the board of directors
Neo4j is a noSQL graph database, used to create graphical relational database.
tidygraph is a Tidy API for graph manipulation.
Node size, Label, Shapes, Images, Border colour, Weight of line, line colour, line type all can be customised to value-add to the network visualisation.
Network visualisation basic attributes
tidygraph helps to build up the stats data for the network graph. visNetwork is Java scripted based, and provides interactivity for the visualisation. igraph is an old package and should be replaced with tidygraph. Lubriate handles date and Clock handles time.
packages = c('tidygraph',
'ggraph', 'visNetwork',
'lubridate', 'clock',
'tidyverse')
for (p in packages) {
if(!require(p,character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
glimpse(GAStech_edges)
Rows: 9,063
Columns: 8
$ source <dbl> 43, 43, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, ~
$ target <dbl> 41, 40, 51, 52, 53, 45, 44, 46, 48, 49, 47, 54, ~
$ SentDate <chr> "6/1/2014", "6/1/2014", "6/1/2014", "6/1/2014", ~
$ SentTime <time> 08:39:00, 08:39:00, 08:58:00, 08:58:00, 08:58:0~
$ Subject <chr> "GT-SeismicProcessorPro Bug Report", "GT-Seismic~
$ MainSubject <chr> "Work related", "Work related", "Work related", ~
$ sourceLabel <chr> "Sven.Flecha", "Sven.Flecha", "Kanon.Herrero", "~
$ targetLabel <chr> "Isak.Baza", "Lucas.Alcazar", "Felix.Resumir", "~
GAStech_edges$SentDate = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate,
label = TRUE,
abbr = FALSE)
# see the SentDate field has been changed to date and time format
head(GAStech_edges, 3)
# A tibble: 3 x 9
source target SentDate SentTime Subject MainSubject sourceLabel
<dbl> <dbl> <date> <time> <chr> <chr> <chr>
1 43 41 2014-01-06 08:39 GT-Seismi~ Work relat~ Sven.Flecha
2 43 40 2014-01-06 08:39 GT-Seismi~ Work relat~ Sven.Flecha
3 44 51 2014-01-06 08:58 Inspectio~ Work relat~ Kanon.Herr~
# ... with 2 more variables: targetLabel <chr>, Weekday <ord>
the Weekday has also been pulled out and factored levels in Ordinate scale
GAStech_edges_aggregated <- GAStech_edges %>%
filter(MainSubject == "Work related") %>%
group_by(source, target, Weekday) %>%
summarise(Weight = n()) %>%
filter(source!=target) %>% #"!" filters away emails that employees send to themselves
filter(Weight > 1) %>% # filter out emails almost without content and are probably in error
ungroup()
glimpse (GAStech_edges_aggregated)
Rows: 1,456
Columns: 4
$ source <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
$ target <dbl> 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6~
$ Weekday <ord> Monday, Tuesday, Wednesday, Friday, Monday, Tuesday,~
$ Weight <int> 4, 3, 5, 8, 4, 3, 5, 8, 4, 3, 5, 8, 4, 3, 5, 8, 4, 3~
GAStech_graph <- tbl_graph(nodes = GAStech_nodes,
edges = GAStech_edges_aggregated,
directed = TRUE) # set directed to true if you want to display the direction
GAStech_graph
# A tbl_graph: 54 nodes and 1456 edges
#
# A directed multigraph with 1 component
#
# Node Data: 54 x 4 (active)
id label Department Title
<dbl> <chr> <chr> <chr>
1 1 Mat.Bramar Administration Assistant to CEO
2 2 Anda.Ribera Administration Assistant to CFO
3 3 Rachel.Pantanal Administration Assistant to CIO
4 4 Linda.Lagos Administration Assistant to COO
5 5 Ruscella.Mies.Haber Administration Assistant to Engineering G~
6 6 Carla.Forluniau Administration Assistant to IT Group Mana~
# ... with 48 more rows
#
# Edge Data: 1,456 x 4
from to Weekday Weight
<int> <int> <ord> <int>
1 1 2 Monday 4
2 1 2 Tuesday 3
3 1 2 Wednesday 5
# ... with 1,453 more rows
GAStech_graph %>%
activate(edges) %>%
arrange(desc(Weight))
# A tbl_graph: 54 nodes and 1456 edges
#
# A directed multigraph with 1 component
#
# Edge Data: 1,456 x 4 (active)
from to Weekday Weight
<int> <int> <ord> <int>
1 40 41 Tuesday 23
2 40 43 Tuesday 19
3 41 43 Tuesday 15
4 41 40 Tuesday 14
5 42 41 Tuesday 13
6 42 40 Tuesday 12
# ... with 1,450 more rows
#
# Node Data: 54 x 4
id label Department Title
<dbl> <chr> <chr> <chr>
1 1 Mat.Bramar Administration Assistant to CEO
2 2 Anda.Ribera Administration Assistant to CFO
3 3 Rachel.Pantanal Administration Assistant to CIO
# ... with 51 more rows
ggraph(GAStech_graph) +
geom_edge_link() +
geom_node_point()
g <- ggraph(GAStech_graph) +
geom_edge_link(aes(colour = 'grey50')) +
geom_node_point(aes(colour = 'grey40'))
g + theme_graph(background = 'grey10',
text_colour = 'white')
g <- ggraph(GAStech_graph,
layout = "fr") +
geom_edge_link(aes()) +
geom_node_point(aes())
g + theme_graph()
g <- ggraph(GAStech_graph,
layout = "nicely") +
geom_edge_link(aes()) +
geom_node_point(aes(colour = Department,
size = 3))
g + theme_graph()
g <- ggraph(GAStech_graph,
layout = "nicely") +
geom_edge_link(aes(width=Weight),
alpha=0.2) +
scale_edge_width(range = c(0.1, 5)) +
geom_node_point(aes(colour = Department),
size = 3) +
theme(legend.position = 'bottom')
g + theme_graph()
g <- ggraph(GAStech_graph,
layout = "nicely") +
geom_edge_link(aes(width=Weight),
alpha=0.2) +
scale_edge_width(range = c(0.1, 5)) + #this defines the min and max thickness of the width of the line
geom_node_point(aes(colour = Department),
size = 3)
g + facet_edges(~Weekday)+
th_foreground(foreground = "grey80",
border = TRUE) +
theme(legend.position = 'bottom')
g <- ggraph(GAStech_graph,
layout = "nicely") +
geom_edge_link(aes(width=Weight),
alpha=0.2) +
scale_edge_width(range = c(0.1, 5)) + #this defines the min and max thickness of the width of the line
geom_node_point(aes(colour = Department),
size = 3)
g + facet_nodes(~Department)+
th_foreground(foreground = "grey80",
border = TRUE) +
theme(legend.position = 'bottom')
g <- GAStech_graph %>%
mutate(betweenness_centrality = centrality_betweenness()) %>%
ggraph(layout = "fr") +
geom_edge_link(aes(width=Weight),
alpha=0.2) +
scale_edge_width(range = c(0.1, 5)) +
geom_node_point(aes(colour = Department,
size=betweenness_centrality))
g + theme_graph()
g <- GAStech_graph %>%
mutate(community = as.factor(group_edge_betweenness(weights = Weight, directed = TRUE))) %>%
ggraph(layout = "fr") +
geom_edge_link(aes(width=Weight),
alpha=0.2) +
scale_edge_width(range = c(0.1, 5)) +
geom_node_point(aes(colour = community))
g + theme_graph()
*creating a new aggregate table that shows the from and to field. Viznetwork has a rigid network structure, only recognises the column name “from” and “to”, so the data columns need to be renamed as well.
GAStech_edges_aggregated <- GAStech_edges %>%
left_join(GAStech_nodes, by = c("sourceLabel" = "label")) %>%
rename(from = id) %>%
left_join(GAStech_nodes, by = c("targetLabel" = "label")) %>%
rename(to = id) %>%
filter(MainSubject == "Work related") %>%
group_by(from, to) %>%
summarise(weight = n()) %>%
filter(from!=to) %>%
filter(weight > 1) %>%
ungroup()
glimpse (GAStech_edges_aggregated)
Rows: 839
Columns: 3
$ from <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,~
$ to <dbl> 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1~
$ weight <int> 21, 21, 21, 21, 21, 21, 15, 15, 15, 15, 15, 15, 15, 1~
visNetwork(GAStech_nodes,
GAStech_edges_aggregated)
visNetwork(GAStech_nodes,
GAStech_edges_aggregated) %>%
visIgraphLayout(layout = "layout_with_fr")
GAStech_nodes <- GAStech_nodes %>%
rename(group = Department)
visNetwork(GAStech_nodes,
GAStech_edges_aggregated) %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visLegend() %>%
visLayout(randomSeed = 123)
visNetwork(GAStech_nodes,
GAStech_edges_aggregated) %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visEdges(arrows = "to",
smooth = list(enabled = TRUE,
type = "curvedCW")) %>%
visLegend() %>%
visLayout(randomSeed = 123)
packages = c('circlize',
'chorddiag')
for (p in packages) {
if(!require(p,character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
mig_data <- read_csv("data/bilateral_migration2017.csv")
run devtools::install_github(“mattflor/chorddiag/”) to install this as it is not readily available in R cran
continue from notess…………